In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.ticker as mtick
plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
In [2]:
os.chdir ('C:\\Users\\win 10\\Documents\\End To End Project\\Data Science Project\\ZOMATO RATING\\')
os.getcwd()
Out[2]:
'C:\\Users\\win 10\\Documents\\End To End Project\\Data Science Project\\ZOMATO RATING'
In [3]:
data = pd.read_csv('zomato.csv')
display(data)
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city)
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51712 https://www.zomato.com/bangalore/best-brews-fo... Four Points by Sheraton Bengaluru, 43/3, White... Best Brews - Four Points by Sheraton Bengaluru... No No 3.6 /5 27 080 40301477 Whitefield Bar NaN Continental 1,500 [('Rated 5.0', "RATED\n Food and service are ... [] Pubs and bars Whitefield
51713 https://www.zomato.com/bangalore/vinod-bar-and... Number 10, Garudachar Palya, Mahadevapura, Whi... Vinod Bar And Restaurant No No NaN 0 +91 8197675843 Whitefield Bar NaN Finger Food 600 [] [] Pubs and bars Whitefield
51714 https://www.zomato.com/bangalore/plunge-sherat... Sheraton Grand Bengaluru Whitefield Hotel & Co... Plunge - Sheraton Grand Bengaluru Whitefield H... No No NaN 0 NaN Whitefield Bar NaN Finger Food 2,000 [] [] Pubs and bars Whitefield
51715 https://www.zomato.com/bangalore/chime-sherato... Sheraton Grand Bengaluru Whitefield Hotel & Co... Chime - Sheraton Grand Bengaluru Whitefield Ho... No Yes 4.3 /5 236 080 49652769 ITPL Main Road, Whitefield Bar Cocktails, Pizza, Buttermilk Finger Food 2,500 [('Rated 4.0', 'RATED\n Nice and friendly pla... [] Pubs and bars Whitefield
51716 https://www.zomato.com/bangalore/the-nest-the-... ITPL Main Road, KIADB Export Promotion Industr... The Nest - The Den Bengaluru No No 3.4 /5 13 +91 8071117272 ITPL Main Road, Whitefield Bar, Casual Dining NaN Finger Food, North Indian, Continental 1,500 [('Rated 5.0', 'RATED\n Great ambience , look... [] Pubs and bars Whitefield

51717 rows × 17 columns

In [4]:
display(data.shape)
(51717, 17)
In [5]:
#checking the data types
print (data.dtypes )
url                            object
address                        object
name                           object
online_order                   object
book_table                     object
rate                           object
votes                           int64
phone                          object
location                       object
rest_type                      object
dish_liked                     object
cuisines                       object
approx_cost(for two people)    object
reviews_list                   object
menu_item                      object
listed_in(type)                object
listed_in(city)                object
dtype: object
In [6]:
#Checking null values
print (data.isna().sum())
url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64
In [7]:
#Deleting Unnnecessary Columns
df=data.drop(['url','phone','dish_liked' ],axis=1) #Dropping the column like "phone" and "url" and saving the new dataset as "df
In [8]:
display(df.head())
address name online_order book_table rate votes location rest_type cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city)
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 Banashankari Quick Bites South Indian, North Indian 300 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
In [9]:
# Check for Duplicate Records
df.duplicated().sum()
Out[9]:
43
In [10]:
# Drop Duplicates
df.drop_duplicates(inplace=True)
In [11]:
display(df)
address name online_order book_table rate votes location rest_type cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city)
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 Banashankari Quick Bites South Indian, North Indian 300 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51712 Four Points by Sheraton Bengaluru, 43/3, White... Best Brews - Four Points by Sheraton Bengaluru... No No 3.6 /5 27 Whitefield Bar Continental 1,500 [('Rated 5.0', "RATED\n Food and service are ... [] Pubs and bars Whitefield
51713 Number 10, Garudachar Palya, Mahadevapura, Whi... Vinod Bar And Restaurant No No NaN 0 Whitefield Bar Finger Food 600 [] [] Pubs and bars Whitefield
51714 Sheraton Grand Bengaluru Whitefield Hotel & Co... Plunge - Sheraton Grand Bengaluru Whitefield H... No No NaN 0 Whitefield Bar Finger Food 2,000 [] [] Pubs and bars Whitefield
51715 Sheraton Grand Bengaluru Whitefield Hotel & Co... Chime - Sheraton Grand Bengaluru Whitefield Ho... No Yes 4.3 /5 236 ITPL Main Road, Whitefield Bar Finger Food 2,500 [('Rated 4.0', 'RATED\n Nice and friendly pla... [] Pubs and bars Whitefield
51716 ITPL Main Road, KIADB Export Promotion Industr... The Nest - The Den Bengaluru No No 3.4 /5 13 ITPL Main Road, Whitefield Bar, Casual Dining Finger Food, North Indian, Continental 1,500 [('Rated 5.0', 'RATED\n Great ambience , look... [] Pubs and bars Whitefield

51674 rows × 14 columns

In [12]:
#Remove all the NaN values records from the dataset
df.dropna(how='any',inplace=True)
display (df.isnull().sum())
address                        0
name                           0
online_order                   0
book_table                     0
rate                           0
votes                          0
location                       0
rest_type                      0
cuisines                       0
approx_cost(for two people)    0
reviews_list                   0
menu_item                      0
listed_in(type)                0
listed_in(city)                0
dtype: int64
In [13]:
# Display Column Names
display(df.columns)
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
       'reviews_list', 'menu_item', 'listed_in(type)', 'listed_in(city)'],
      dtype='object')
In [14]:
# Rename Column Names
df = df.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type',
                                  'listed_in(city)':'city'})
display (df.columns)
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'cost', 'reviews_list',
       'menu_item', 'type', 'city'],
      dtype='object')
In [15]:
display (df)
address name online_order book_table rate votes location rest_type cuisines cost reviews_list menu_item type city
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 Banashankari Quick Bites South Indian, North Indian 300 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51709 136, SAP Labs India, KIADB Export Promotion In... The Farm House Bar n Grill No No 3.7 /5 34 Whitefield Casual Dining, Bar North Indian, Continental 800 [('Rated 4.0', 'RATED\n Ambience- Big and spa... [] Pubs and bars Whitefield
51711 139/C1, Next To GR Tech Park, Pattandur Agraha... Bhagini No No 2.5 /5 81 Whitefield Casual Dining, Bar Andhra, South Indian, Chinese, North Indian 800 [('Rated 4.0', 'RATED\n A fine place to chill... [] Pubs and bars Whitefield
51712 Four Points by Sheraton Bengaluru, 43/3, White... Best Brews - Four Points by Sheraton Bengaluru... No No 3.6 /5 27 Whitefield Bar Continental 1,500 [('Rated 5.0', "RATED\n Food and service are ... [] Pubs and bars Whitefield
51715 Sheraton Grand Bengaluru Whitefield Hotel & Co... Chime - Sheraton Grand Bengaluru Whitefield Ho... No Yes 4.3 /5 236 ITPL Main Road, Whitefield Bar Finger Food 2,500 [('Rated 4.0', 'RATED\n Nice and friendly pla... [] Pubs and bars Whitefield
51716 ITPL Main Road, KIADB Export Promotion Industr... The Nest - The Den Bengaluru No No 3.4 /5 13 ITPL Main Road, Whitefield Bar, Casual Dining Finger Food, North Indian, Continental 1,500 [('Rated 5.0', 'RATED\n Great ambience , look... [] Pubs and bars Whitefield

43499 rows × 14 columns

In [16]:
display (df['cost'].unique())
array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
       '900', '200', '750', '150', '850', '100', '1,200', '350', '250',
       '950', '1,000', '1,500', '1,300', '199', '80', '1,100', '160',
       '1,600', '230', '130', '1,700', '1,400', '1,350', '2,200', '2,000',
       '1,800', '1,900', '180', '330', '2,500', '2,100', '3,000', '2,800',
       '3,400', '50', '40', '1,250', '3,500', '4,000', '2,400', '2,600',
       '1,450', '70', '3,200', '560', '240', '360', '6,000', '1,050',
       '2,300', '4,100', '120', '5,000', '3,700', '1,650', '2,700',
       '4,500'], dtype=object)
In [17]:
# Remove the comma separator in the number. Change the data type from string to number 
#df["cost"] = df["cost"].replace(",", "", regex=True)df
df['cost'] = df['cost'].apply(lambda x: x.replace(',','')) #Using lambda function to replace ',' from cost
df['cost'] = df['cost'].astype(float)
In [18]:
display (df['cost'].unique())
array([ 800.,  300.,  600.,  700.,  550.,  500.,  450.,  650.,  400.,
        900.,  200.,  750.,  150.,  850.,  100., 1200.,  350.,  250.,
        950., 1000., 1500., 1300.,  199.,   80., 1100.,  160., 1600.,
        230.,  130., 1700., 1400., 1350., 2200., 2000., 1800., 1900.,
        180.,  330., 2500., 2100., 3000., 2800., 3400.,   50.,   40.,
       1250., 3500., 4000., 2400., 2600., 1450.,   70., 3200.,  560.,
        240.,  360., 6000., 1050., 2300., 4100.,  120., 5000., 3700.,
       1650., 2700., 4500.])
In [19]:
display (df.dtypes)
address          object
name             object
online_order     object
book_table       object
rate             object
votes             int64
location         object
rest_type        object
cuisines         object
cost            float64
reviews_list     object
menu_item        object
type             object
city             object
dtype: object
In [20]:
#Reading unique values from the Rate column
df['rate'].unique()
Out[20]:
array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
In [21]:
# Number of records related to each unique value
display(df['rate'].value_counts().head(60))
NEW       2197
3.9/5     2089
3.7/5     2008
3.8/5     1997
3.9 /5    1865
3.8 /5    1819
3.7 /5    1799
3.6/5     1753
4.0/5     1597
4.0 /5    1547
3.6 /5    1533
4.1/5     1469
4.1 /5    1456
3.5/5     1423
3.5 /5    1340
3.4/5     1247
3.4 /5    1197
3.3/5     1147
4.2 /5    1141
3.3 /5    1125
4.2/5     1013
3.2/5      997
4.3 /5     910
3.1/5      851
3.2 /5     847
4.3/5      772
3.1 /5     699
4.4 /5     627
3.0/5      543
4.4/5      519
3.0 /5     447
2.9/5      427
4.5 /5     409
2.9 /5     374
2.8/5      302
2.8 /5     278
4.5/5      247
4.6 /5     175
2.7/5      167
2.6/5      140
2.7 /5     136
4.6/5      125
2.6 /5     109
4.7 /5      86
4.7/5       81
-           65
2.5 /5      56
2.5/5       44
4.8 /5      43
2.4/5       36
4.9 /5      30
2.4 /5      30
2.3/5       28
4.9/5       25
2.3 /5      23
4.8/5       23
2.2/5       19
2.1 /5      13
2.1/5       11
2.2 /5       7
Name: rate, dtype: int64
In [24]:
#There is one Unique value "NEW", get rid of all records (147 records) with value new in the rate column
df = df.loc[df.rate !='NEW',] 
display(df)
address name online_order book_table rate votes location rest_type cuisines cost reviews_list menu_item type city
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 Banashankari Casual Dining Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 Banashankari Quick Bites South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 Basavanagudi Casual Dining North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51709 136, SAP Labs India, KIADB Export Promotion In... The Farm House Bar n Grill No No 3.7 /5 34 Whitefield Casual Dining, Bar North Indian, Continental 800.0 [('Rated 4.0', 'RATED\n Ambience- Big and spa... [] Pubs and bars Whitefield
51711 139/C1, Next To GR Tech Park, Pattandur Agraha... Bhagini No No 2.5 /5 81 Whitefield Casual Dining, Bar Andhra, South Indian, Chinese, North Indian 800.0 [('Rated 4.0', 'RATED\n A fine place to chill... [] Pubs and bars Whitefield
51712 Four Points by Sheraton Bengaluru, 43/3, White... Best Brews - Four Points by Sheraton Bengaluru... No No 3.6 /5 27 Whitefield Bar Continental 1500.0 [('Rated 5.0', "RATED\n Food and service are ... [] Pubs and bars Whitefield
51715 Sheraton Grand Bengaluru Whitefield Hotel & Co... Chime - Sheraton Grand Bengaluru Whitefield Ho... No Yes 4.3 /5 236 ITPL Main Road, Whitefield Bar Finger Food 2500.0 [('Rated 4.0', 'RATED\n Nice and friendly pla... [] Pubs and bars Whitefield
51716 ITPL Main Road, KIADB Export Promotion Industr... The Nest - The Den Bengaluru No No 3.4 /5 13 ITPL Main Road, Whitefield Bar, Casual Dining Finger Food, North Indian, Continental 1500.0 [('Rated 5.0', 'RATED\n Great ambience , look... [] Pubs and bars Whitefield

41302 rows × 14 columns

In [25]:
#There is one Unique value "-", get rid of all records (147 records) with value new in the rate column
df = df.loc[df.rate !='-'] 
display(df)
address name online_order book_table rate votes location rest_type cuisines cost reviews_list menu_item type city
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 Banashankari Casual Dining Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 Banashankari Quick Bites South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 Basavanagudi Casual Dining North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
51709 136, SAP Labs India, KIADB Export Promotion In... The Farm House Bar n Grill No No 3.7 /5 34 Whitefield Casual Dining, Bar North Indian, Continental 800.0 [('Rated 4.0', 'RATED\n Ambience- Big and spa... [] Pubs and bars Whitefield
51711 139/C1, Next To GR Tech Park, Pattandur Agraha... Bhagini No No 2.5 /5 81 Whitefield Casual Dining, Bar Andhra, South Indian, Chinese, North Indian 800.0 [('Rated 4.0', 'RATED\n A fine place to chill... [] Pubs and bars Whitefield
51712 Four Points by Sheraton Bengaluru, 43/3, White... Best Brews - Four Points by Sheraton Bengaluru... No No 3.6 /5 27 Whitefield Bar Continental 1500.0 [('Rated 5.0', "RATED\n Food and service are ... [] Pubs and bars Whitefield
51715 Sheraton Grand Bengaluru Whitefield Hotel & Co... Chime - Sheraton Grand Bengaluru Whitefield Ho... No Yes 4.3 /5 236 ITPL Main Road, Whitefield Bar Finger Food 2500.0 [('Rated 4.0', 'RATED\n Nice and friendly pla... [] Pubs and bars Whitefield
51716 ITPL Main Road, KIADB Export Promotion Industr... The Nest - The Den Bengaluru No No 3.4 /5 13 ITPL Main Road, Whitefield Bar, Casual Dining Finger Food, North Indian, Continental 1500.0 [('Rated 5.0', 'RATED\n Great ambience , look... [] Pubs and bars Whitefield

41237 rows × 14 columns

In [26]:
# Display Unique Values
display (df['rate'].unique())
array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5', '4.5/5',
       '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5', '3.4 /5',
       '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5', '4.1 /5',
       '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5', '3.5 /5',
       '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5', '4.3 /5',
       '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5', '4.9 /5',
       '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5', '2.1 /5',
       '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
In [27]:
#Removing '/5' from Rates

df['rate'] = df['rate'].apply(lambda x: x.replace('/5',''))
display (df['rate'].unique())
array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
       '3.0', '3.2', '3.3', '2.8', '4.4', '4.3', '2.9', '3.5', '2.6',
       '3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2', '2.3',
       '3.4 ', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '3.7 ',
       '3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ', '3.2 ',
       '2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8', '4.6 ',
       '4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
       '2.0 ', '1.8 '], dtype=object)
In [28]:
#Restaurants with number of outlets 
df['name'].value_counts()
Out[28]:
Cafe Coffee Day                   86
Onesta                            85
Empire Restaurant                 69
Kanti Sweets                      68
Five Star Chicken                 68
                                  ..
Daafoodies                         1
I Siri Restaurant And Caterers     1
The Foodware Veg                   1
Hotel Thalassery                   1
SeeYa Restaurant                   1
Name: name, Length: 6602, dtype: int64
In [29]:
#Display the Top 20 restaurants 
df['name'].value_counts().head(20)
Out[29]:
Cafe Coffee Day      86
Onesta               85
Empire Restaurant    69
Kanti Sweets         68
Five Star Chicken    68
Just Bake            67
Baskin Robbins       62
Petoo                60
Pizza Hut            60
KFC                  60
Smoor                59
McDonald's           59
Domino's Pizza       59
Subway               59
Sweet Truth          58
Polar Bear           58
Faasos               56
Beijing Bites        56
Burger King          55
Keventers            54
Name: name, dtype: int64
In [30]:
#Create a data frame with top 20 restaurants 
chains=df['name'].value_counts()[:20]
display(chains)
Cafe Coffee Day      86
Onesta               85
Empire Restaurant    69
Kanti Sweets         68
Five Star Chicken    68
Just Bake            67
Baskin Robbins       62
Petoo                60
Pizza Hut            60
KFC                  60
Smoor                59
McDonald's           59
Domino's Pizza       59
Subway               59
Sweet Truth          58
Polar Bear           58
Faasos               56
Beijing Bites        56
Burger King          55
Keventers            54
Name: name, dtype: int64
In [31]:
# Visualisations- Bar chat with Top 20 restaurants  with number of outlets
plt.figure(figsize=(20,15))
chains=df['name'].value_counts()[:20]
sns.barplot(x=chains,y=chains.index,palette='deep')
plt.title("Most famous restaurants chains in Bangaluru")
plt.xlabel("Number of outlets")
plt.show()
In [32]:
#Restaurants with table booking
display(df['book_table'].value_counts())
No     34938
Yes     6299
Name: book_table, dtype: int64
In [33]:
plt.figure(figsize=(10,10))

sns.countplot(df['book_table'])
plt.title("Number of restaurants that have the option to book table",fontsize=25,color='purple')
plt.ylabel("Count",fontsize=20)
plt.xlabel("Book Table",fontsize=20)
plt.show()
In [34]:
#Restaurants delivering Online or not
sns.countplot(df['online_order'])
fig = plt.gcf()
fig.set_size_inches(10,10)
plt.title('Whether Restaurants deliver online or Not')
plt.show()
In [35]:
# Ratings distribution , most of the ratings are within 3.5 and 4.5
plt.figure(figsize=(15,7))

sns.distplot(df['rate'],bins=20)
Out[35]:
<AxesSubplot:xlabel='rate', ylabel='Density'>
In [36]:
#Count of ratings as between "1 and 2", "2 and 3", "3 and 4", and "4 and 5"
# Display Unique Rating
display(df['rate'].unique())
array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
       '3.0', '3.2', '3.3', '2.8', '4.4', '4.3', '2.9', '3.5', '2.6',
       '3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2', '2.3',
       '3.4 ', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '3.7 ',
       '3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ', '3.2 ',
       '2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8', '4.6 ',
       '4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
       '2.0 ', '1.8 '], dtype=object)
In [37]:
# Convert Rating from string to float
df['rate']=df['rate'].astype(float)
display(df['rate'].unique())
array([4.1, 3.8, 3.7, 3.6, 4.6, 4. , 4.2, 3.9, 3.1, 3. , 3.2, 3.3, 2.8,
       4.4, 4.3, 2.9, 3.5, 2.6, 3.4, 4.5, 2.5, 2.7, 4.7, 2.4, 2.2, 2.3,
       4.8, 4.9, 2.1, 2. , 1.8])
In [38]:
# Create Histogram
group= [1,2,3,4,5]
plt.hist (df['rate'],group,histtype = 'bar',rwidth =0.5,color = 'b')
plt.show()
In [39]:
# Number of Restaurants with Rating  Between 1 and 2
gr_1to2=((df['rate']>=1) & (df['rate']<2)).sum()
display (gr_1to2)
5
In [40]:
# Number of Restaurants with Rating  Between 2 and 3
gr_2to3=((df['rate']>=2) & (df['rate']<3)).sum()
display (gr_2to3)
2211
In [41]:
# Number of Restaurants with Rating  Between 3 and 4
gr_3to4=((df['rate']>=3) & (df['rate']<4)).sum()
display (gr_3to4)
26726
In [42]:
# Number of Restaurants with Rating  more than 4
gr_4to5=(df['rate']>=4).sum()
display (gr_4to5)
12295
In [43]:
import plotly.graph_objs as go
import plotly.offline as py
In [44]:
# Create a Pie Chart 
slices=[gr_1to2,gr_2to3,gr_3to4,gr_4to5]   

labels=['Rating 1 to 2','Rating 2 to 3','Rating 3 to 4','Rating >4']
colors = ['#ff3333','#c2c2d6','#6699ff']
plt.pie(slices,colors=colors, labels=labels, autopct='%1.0f%%', pctdistance=.5, labeldistance=1.2,shadow=True)
fig = plt.gcf()
plt.title("Percentage of Restaurants according to their ratings")

fig.set_size_inches(10,10)
plt.show()
In [45]:
display(df['type'].value_counts())
Delivery              20431
Dine-out              14062
Desserts               2709
Cafes                  1511
Drinks & nightlife     1045
Buffet                  847
Pubs and bars           632
Name: type, dtype: int64
In [46]:
#Service Type - count Plot
# Here the two main service types are Delivery and Dine-out

sns.countplot(df['type']).set_xticklabels(sns.countplot(df['type']).get_xticklabels(), rotation=90, ha="right")
fig = plt.gcf()
fig.set_size_inches(20,12)
plt.title('Type of Service')
plt.show()
In [47]:
# Display Unique Cost
two_people_cost = df['cost'].unique()
display(np.sort(two_people_cost)) 
array([  40.,   50.,   70.,   80.,  100.,  120.,  130.,  150.,  180.,
        199.,  200.,  230.,  240.,  250.,  300.,  330.,  350.,  400.,
        450.,  500.,  550.,  600.,  650.,  700.,  750.,  800.,  850.,
        900.,  950., 1000., 1050., 1100., 1200., 1250., 1300., 1350.,
       1400., 1450., 1500., 1600., 1650., 1700., 1800., 1900., 2000.,
       2100., 2200., 2300., 2400., 2500., 2600., 2700., 2800., 3000.,
       3200., 3400., 3500., 3700., 4000., 4100., 4500., 5000., 6000.])
In [48]:
#Number of Restaurants in each group 
df.groupby('cost').size()
Out[48]:
cost
40.0        8
50.0        6
70.0        1
80.0        1
100.0     636
         ... 
4000.0     29
4100.0      4
4500.0      2
5000.0      1
6000.0      2
Length: 63, dtype: int64
In [49]:
# Create a box plot
from plotly.offline import iplot
trace0=go.Box(y=df['cost'],name="accepting online orders",
              marker = dict(
        color = 'rgb(113, 10, 100)',
    ))
data=[trace0]
layout=go.Layout(title="Box plot of approximate cost",width=800,height=800,yaxis=dict(title="Price"))
fig=go.Figure(data=data,layout=layout)
py.iplot(fig)
In [50]:
plt.figure(figsize=(10,10))

ax=df.rest_type.value_counts()[:10]
label=df['rest_type'].value_counts()[:10].index
ax.plot(kind='pie',labels=label,autopct='%.2f')

plt.title("Type of Restaurant in City",fontsize=20,color='darkgreen')
plt.show()
In [52]:
plt.figure(figsize=(10,10))

ax=df.city.value_counts()[:10]
labels=df['city'].value_counts()[:10].index
plt.pie(ax,labels=labels,autopct='%.2f')
plt.title('number of restaurants in each area of bangalore',fontsize=20,color='darkblue')
plt.show()
In [55]:
# Restaurant Type and Count 
df['rest_type'].value_counts().head(50)
Out[55]:
Quick Bites                      13871
Casual Dining                     9608
Cafe                              3368
Dessert Parlor                    1850
Delivery                          1666
Takeaway, Delivery                1278
Casual Dining, Bar                1092
Bakery                             704
Bar                                640
Beverage Shop                      639
Food Court                         498
Bar, Casual Dining                 385
Lounge                             381
Pub                                351
Sweet Shop                         346
Fine Dining                        342
Casual Dining, Cafe                310
Beverage Shop, Quick Bites         239
Pub, Casual Dining                 236
Bakery, Quick Bites                225
Mess                               180
Cafe, Casual Dining                173
Sweet Shop, Quick Bites            171
Kiosk                              152
Cafe, Bakery                       146
Cafe, Dessert Parlor               144
Dessert Parlor, Cafe               144
Casual Dining, Pub                 127
Bakery, Dessert Parlor             127
Microbrewery, Casual Dining        121
Dessert Parlor, Quick Bites        118
Cafe, Quick Bites                   93
Takeaway                            79
Food Court, Quick Bites             78
Pub, Microbrewery                   76
Quick Bites, Sweet Shop             75
Beverage Shop, Dessert Parlor       75
Dessert Parlor, Beverage Shop       73
Quick Bites, Dessert Parlor         73
Bakery, Cafe                        72
Quick Bites, Beverage Shop          72
Food Truck                          68
Dessert Parlor, Bakery              67
Casual Dining, Microbrewery         47
Quick Bites, Bakery                 43
Microbrewery, Pub                   42
Pub, Cafe                           40
Fine Dining, Bar                    40
Club                                37
Lounge, Casual Dining               37
Name: rest_type, dtype: int64
In [56]:
# Create Graph with top 20 restaurant type 
#Casual Dining, Quick Bites and Cafe are the 3 most common types of Restaurants
plt.figure(figsize=(15,7))
rest=df['rest_type'].value_counts()[:20]
sns.barplot(rest,rest.index)
plt.title("Restaurant types")
plt.xlabel("count")
plt.show()
In [57]:
plt.figure(figsize=(15,7))
chains=df['name'].value_counts()[:20]
sns.barplot(x=chains,y=chains.index,palette='Set1')
plt.title("Most famous restaurant chains",size=20,pad=20)
plt.xlabel("Number of outlets",size=15)
plt.show()
In [58]:
# Create the Model 
# display top 5 records 
display(df.head())
address name online_order book_table rate votes location rest_type cuisines cost reviews_list menu_item type city
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1 787 Banashankari Casual Dining Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8 166 Basavanagudi Casual Dining North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
In [59]:
#Convert the online categorical variables into a numeric format
df.online_order[df.online_order == 'Yes'] = 1 
df.online_order[df.online_order == 'No'] = 0
In [60]:
df.online_order.value_counts()
Out[60]:
1    27081
0    14156
Name: online_order, dtype: int64
In [61]:
#Change the data type to Number 
df.online_order = pd.to_numeric(df.online_order)
display (df.head(20))
address name online_order book_table rate votes location rest_type cuisines cost reviews_list menu_item type city
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa 1 Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant 1 No 4.1 787 Banashankari Casual Dining Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe 1 No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana 0 No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village 0 No 3.8 166 Basavanagudi Casual Dining North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
5 37, 5-1, 4th Floor, Bosco Court, Gandhi Bazaar... Timepass Dinner 1 No 3.8 286 Basavanagudi Casual Dining North Indian 600.0 [('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/... [] Buffet Banashankari
6 19/1, New Timberyard Layout, Beside Satellite ... Rosewood International Hotel - Bar & Restaurant 0 No 3.6 8 Mysore Road Casual Dining North Indian, South Indian, Andhra, Chinese 800.0 [('Rated 5.0', 'RATED\n Awesome food ??Great ... [] Buffet Banashankari
7 2469, 3rd Floor, 24th Cross, Opposite BDA Comp... Onesta 1 Yes 4.6 2556 Banashankari Casual Dining, Cafe Pizza, Cafe, Italian 600.0 [('Rated 5.0', 'RATED\n I personally really l... [] Cafes Banashankari
8 1, 30th Main Road, 3rd Stage, Banashankari, Ba... Penthouse Cafe 1 No 4.0 324 Banashankari Cafe Cafe, Italian, Continental 700.0 [('Rated 3.0', "RATED\n I had been to this pl... [] Cafes Banashankari
9 2470, 21 Main Road, 25th Cross, Banashankari, ... Smacznego 1 No 4.2 504 Banashankari Cafe Cafe, Mexican, Italian, Momos, Beverages 550.0 [('Rated 4.0', "RATED\n Easy to locate\nVFM 3... [] Cafes Banashankari
10 12,29 Near PES University Back Gate, D'Souza N... Café Down The A... 1 No 4.1 402 Banashankari Cafe Cafe 500.0 [('Rated 4.0', 'RATED\n We ended up here on a... [] Cafes Banashankari
11 941, 3rd FLOOR, 21st Main, 22nd Cross, Banasha... Cafe Shuffle 1 Yes 4.2 150 Banashankari Cafe Cafe, Italian, Continental 600.0 [('Rated 1.0', "RATED\n \n\nHorrible. Not even... [] Cafes Banashankari
12 6th Block, 3rd Stage, Banashankari, Bangalore The Coffee Shack 1 Yes 4.2 164 Banashankari Cafe Cafe, Chinese, Continental, Italian 500.0 [('Rated 4.0', "RATED\n Food - 4/5\nAmbience ... [] Cafes Banashankari
13 111, Sapphire Toys Building, 100 Feet Ring Roa... Caf-Eleven 0 No 4.0 424 Banashankari Cafe Cafe, Continental 450.0 [('Rated 2.0', "RATED\n This is a hookah cafe... [] Cafes Banashankari
14 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe 1 No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Cafes Banashankari
15 2303, 21st Cross, K R Road, 2nd Stage, Banasha... Cafe Vivacity 1 No 3.8 90 Banashankari Cafe Cafe 650.0 [('Rated 2.0', 'RATED\n Not so good place as ... [] Cafes Banashankari
16 241, 4th Floor, 100 Feet Ring Road, Opposite K... Catch-up-ino 1 No 3.9 133 Banashankari Cafe Cafe, Fast Food, Continental, Chinese, Momos 800.0 [('Rated 1.0', "RATED\n This place is right o... [] Cafes Banashankari
17 405, 24th Cross, 9th Main, 2nd Stage, Banashan... Kirthi's Biryani 1 No 3.8 144 Banashankari Cafe Chinese, Cafe, Italian 700.0 [('Rated 3.0', "RATED\n New place.. though it... [] Cafes Banashankari
18 504, CJ Venkata Das Road, Padmanabhangar, 2nd ... T3H Cafe 0 No 3.9 93 Banashankari Cafe Cafe, Italian, American 300.0 [('Rated 4.0', "RATED\n Happy to see such a c... [] Cafes Banashankari
19 47, 48 &49, 3Rd Floor, 21st Main Road, 2nd Sta... 360 Atoms Restaurant And Cafe 1 No 3.1 13 Banashankari Cafe Cafe, Chinese, Continental, Italian 400.0 [('Rated 5.0', 'RATED\n Friendly staffs , nic... [] Cafes Banashankari
In [62]:
#change the string categorical into to a categorical int

df.book_table[df.book_table == 'Yes'] = 1 
df.book_table[df.book_table == 'No'] = 0
In [63]:
df.book_table = pd.to_numeric(df.book_table)
In [64]:
display(df.book_table.value_counts())
0    34938
1     6299
Name: book_table, dtype: int64
In [65]:
#Label encode the categorical variables to make it easier to build algorithm
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df.location = le.fit_transform(df.location)
df.rest_type = le.fit_transform(df.rest_type)
df.cuisines = le.fit_transform(df.cuisines)
df.menu_item = le.fit_transform(df.menu_item)
In [66]:
display (df.head(20))
address name online_order book_table rate votes location rest_type cuisines cost reviews_list menu_item type city
0 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa 1 1 4.1 775 1 24 1894 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... 8242 Buffet Banashankari
1 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant 1 0 4.1 787 1 24 816 800.0 [('Rated 4.0', 'RATED\n Had been here for din... 8242 Buffet Banashankari
2 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe 1 0 3.8 918 1 19 653 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... 8242 Buffet Banashankari
3 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana 0 0 3.7 88 1 73 2221 300.0 [('Rated 4.0', "RATED\n Great food and proper... 8242 Buffet Banashankari
4 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village 0 0 3.8 166 4 24 1921 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... 8242 Buffet Banashankari
5 37, 5-1, 4th Floor, Bosco Court, Gandhi Bazaar... Timepass Dinner 1 0 3.8 286 4 24 1585 600.0 [('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/... 8242 Buffet Banashankari
6 19/1, New Timberyard Layout, Beside Satellite ... Rosewood International Hotel - Bar & Restaurant 0 0 3.6 8 56 24 1938 800.0 [('Rated 5.0', 'RATED\n Awesome food ??Great ... 8242 Buffet Banashankari
7 2469, 3rd Floor, 24th Cross, Opposite BDA Comp... Onesta 1 1 4.6 2556 1 26 2028 600.0 [('Rated 5.0', 'RATED\n I personally really l... 8242 Cafes Banashankari
8 1, 30th Main Road, 3rd Stage, Banashankari, Ba... Penthouse Cafe 1 0 4.0 324 1 16 629 700.0 [('Rated 3.0', "RATED\n I had been to this pl... 8242 Cafes Banashankari
9 2470, 21 Main Road, 25th Cross, Banashankari, ... Smacznego 1 0 4.2 504 1 16 654 550.0 [('Rated 4.0', "RATED\n Easy to locate\nVFM 3... 8242 Cafes Banashankari
10 12,29 Near PES University Back Gate, D'Souza N... Café Down The A... 1 0 4.1 402 1 16 475 500.0 [('Rated 4.0', 'RATED\n We ended up here on a... 8242 Cafes Banashankari
11 941, 3rd FLOOR, 21st Main, 22nd Cross, Banasha... Cafe Shuffle 1 1 4.2 150 1 16 629 600.0 [('Rated 1.0', "RATED\n \n\nHorrible. Not even... 8242 Cafes Banashankari
12 6th Block, 3rd Stage, Banashankari, Bangalore The Coffee Shack 1 1 4.2 164 1 16 532 500.0 [('Rated 4.0', "RATED\n Food - 4/5\nAmbience ... 8242 Cafes Banashankari
13 111, Sapphire Toys Building, 100 Feet Ring Roa... Caf-Eleven 0 0 4.0 424 1 16 538 450.0 [('Rated 2.0', "RATED\n This is a hookah cafe... 8242 Cafes Banashankari
14 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe 1 0 3.8 918 1 19 653 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... 8242 Cafes Banashankari
15 2303, 21st Cross, K R Road, 2nd Stage, Banasha... Cafe Vivacity 1 0 3.8 90 1 16 475 650.0 [('Rated 2.0', 'RATED\n Not so good place as ... 8242 Cafes Banashankari
16 241, 4th Floor, 100 Feet Ring Road, Opposite K... Catch-up-ino 1 0 3.9 133 1 16 604 800.0 [('Rated 1.0', "RATED\n This place is right o... 8242 Cafes Banashankari
17 405, 24th Cross, 9th Main, 2nd Stage, Banashan... Kirthi's Biryani 1 0 3.8 144 1 16 720 700.0 [('Rated 3.0', "RATED\n New place.. though it... 8242 Cafes Banashankari
18 504, CJ Venkata Das Road, Padmanabhangar, 2nd ... T3H Cafe 0 0 3.9 93 1 16 623 300.0 [('Rated 4.0', "RATED\n Happy to see such a c... 8242 Cafes Banashankari
19 47, 48 &49, 3Rd Floor, 21st Main Road, 2nd Sta... 360 Atoms Restaurant And Cafe 1 0 3.1 13 1 16 532 400.0 [('Rated 5.0', 'RATED\n Friendly staffs , nic... 8242 Cafes Banashankari
In [67]:
my_data=df.iloc[:,[2,3,4,5,6,7,8,9,11]]
my_data.to_csv('Zomato_df.csv')
display (my_data)
online_order book_table rate votes location rest_type cuisines cost menu_item
0 1 1 4.1 775 1 24 1894 800.0 8242
1 1 0 4.1 787 1 24 816 800.0 8242
2 1 0 3.8 918 1 19 653 800.0 8242
3 0 0 3.7 88 1 73 2221 300.0 8242
4 0 0 3.8 166 4 24 1921 600.0 8242
... ... ... ... ... ... ... ... ... ...
41232 0 0 3.7 34 88 25 1785 800.0 8242
41233 0 0 2.5 81 88 25 101 800.0 8242
41234 0 0 3.6 27 88 6 866 1500.0 8242
41235 0 1 4.3 236 26 6 1207 2500.0 8242
41236 0 0 3.4 13 26 7 1231 1500.0 8242

41237 rows × 9 columns

In [68]:
#Create independent Variable 
x = df.iloc[:,[2,3,5,6,7,8,9,11]]
x.head()
Out[68]:
online_order book_table votes location rest_type cuisines cost menu_item
0 1 1 775 1 24 1894 800.0 8242
1 1 0 787 1 24 816 800.0 8242
2 1 0 918 1 19 653 800.0 8242
3 0 0 88 1 73 2221 300.0 8242
4 0 0 166 4 24 1921 600.0 8242
In [69]:
#Create Dependent Variable 
y = df['rate']
display(y)
0        4.1
1        4.1
2        3.8
3        3.7
4        3.8
        ... 
41232    3.7
41233    2.5
41234    3.6
41235    4.3
41236    3.4
Name: rate, Length: 41237, dtype: float64
In [70]:
# Train Test Split
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.3,random_state=10)
In [71]:
# Create Linear Regression Model
from sklearn.linear_model import LinearRegression
lr_model=LinearRegression()
lr_model.fit(x_train,y_train)
Out[71]:
LinearRegression()
In [72]:
# Display Accuracy 
from sklearn.metrics import r2_score
y_pred=lr_model.predict(x_test)
display (r2_score(y_test,y_pred))
0.30080021393797163
In [73]:
# Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor
RF_Model=RandomForestRegressor(n_estimators=650,random_state=245,min_samples_leaf=.0001)
RF_Model.fit(x_train,y_train)
Out[73]:
RandomForestRegressor(min_samples_leaf=0.0001, n_estimators=650,
                      random_state=245)
In [74]:
y_predict=RF_Model.predict(x_test)
display (r2_score(y_test,y_predict))
0.8743074009718356
In [75]:
from sklearn.linear_model import Lasso
In [76]:
lr=Lasso()
lr
Out[76]:
Lasso()
In [77]:
lr.fit(x_train,y_train)
Out[77]:
Lasso()
In [78]:
y_predict=lr.predict(x_test)
In [79]:
display (r2_score(y_test,y_pred))
0.30080021393797163
In [80]:
from sklearn.svm import SVR
svr=SVR()
svr.fit(x_train,y_train)
Out[80]:
SVR()
In [81]:
y_predict=svr.predict(x_test)
display (r2_score(y_test,y_pred))
0.30080021393797163
In [82]:
import xgboost as xgb
xgb_model=xgb.XGBRegressor()
xgb_model.fit(x_train,y_train)
xgb_pred=xgb_model.predict(x_test)
display (r2_score(y_test,xgb_pred))
0.7391740232273454
In [83]:
from sklearn.neighbors import KNeighborsRegressor
knn =KNeighborsRegressor(n_neighbors =5 , metric = 'minkowski', p= 1)
knn.fit (x_train,y_train )
Out[83]:
KNeighborsRegressor(p=1)
In [84]:
y_pred =  knn.predict (x_test)
y_pred 
Out[84]:
array([3.94, 3.58, 3.  , ..., 3.66, 4.48, 3.26])
In [85]:
display (r2_score(y_test,y_pred))
0.673740950903428
In [87]:
#Ordinary Least Squares regression (OLS) is a common technique for estimating coefficients of linear regression equations 
#which describe the relationship between one or more independent quantitative variables and a dependent variable
x=x.astype('float64')
import statsmodels.api as sm
reg_ols = sm.OLS (endog = y, exog = x)
reg_ols = reg_ols.fit()
print (reg_ols.summary())
                                 OLS Regression Results                                
=======================================================================================
Dep. Variable:                   rate   R-squared (uncentered):                   0.960
Model:                            OLS   Adj. R-squared (uncentered):              0.960
Method:                 Least Squares   F-statistic:                          1.232e+05
Date:                Tue, 02 Aug 2022   Prob (F-statistic):                        0.00
Time:                        00:42:06   Log-Likelihood:                         -46482.
No. Observations:               41237   AIC:                                  9.298e+04
Df Residuals:                   41229   BIC:                                  9.305e+04
Df Model:                           8                                                  
Covariance Type:            nonrobust                                                  
================================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
online_order     0.9361      0.007    134.118      0.000       0.922       0.950
book_table       0.0076      0.013      0.572      0.567      -0.018       0.034
votes            0.0002    4.6e-06     34.097      0.000       0.000       0.000
location         0.0064      0.000     46.179      0.000       0.006       0.007
rest_type        0.0083      0.000     56.430      0.000       0.008       0.009
cuisines         0.0002   5.59e-06     35.589      0.000       0.000       0.000
cost             0.0008   1.01e-05     77.360      0.000       0.001       0.001
menu_item        0.0002   1.24e-06    179.839      0.000       0.000       0.000
==============================================================================
Omnibus:                       96.439   Durbin-Watson:                   1.516
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               95.812
Skew:                           0.110   Prob(JB):                     1.57e-21
Kurtosis:                       2.913   Cond. No.                     2.74e+04
==============================================================================

Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The condition number is large, 2.74e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [88]:
from sklearn.linear_model import LinearRegression
reg= LinearRegression()
reg.fit(x_train,y_train)
Out[88]:
LinearRegression()
In [89]:
y_pred= reg.predict(x_test)
print (y_pred)
[3.6554235  3.45375423 3.46836078 ... 3.53803275 4.72090704 3.45206852]
In [90]:
r2_score (y_test,y_pred)
Out[90]:
0.30080021393797163
In [91]:
from sklearn.tree import DecisionTreeRegressor
dt =DecisionTreeRegressor()
dt.fit(x,y)
Out[91]:
DecisionTreeRegressor()
In [92]:
y_pred= reg.predict(x_test)
print (y_pred)
r2_score (y_test,y_pred)
[3.6554235  3.45375423 3.46836078 ... 3.53803275 4.72090704 3.45206852]
Out[92]:
0.30080021393797163
In [93]:
from sklearn.preprocessing import PolynomialFeatures 
polynom = PolynomialFeatures(degree =1) 
polynom.fit_transform(x) 
Out[93]:
array([[1.000e+00, 1.000e+00, 1.000e+00, ..., 1.894e+03, 8.000e+02,
        8.242e+03],
       [1.000e+00, 1.000e+00, 0.000e+00, ..., 8.160e+02, 8.000e+02,
        8.242e+03],
       [1.000e+00, 1.000e+00, 0.000e+00, ..., 6.530e+02, 8.000e+02,
        8.242e+03],
       ...,
       [1.000e+00, 0.000e+00, 0.000e+00, ..., 8.660e+02, 1.500e+03,
        8.242e+03],
       [1.000e+00, 0.000e+00, 1.000e+00, ..., 1.207e+03, 2.500e+03,
        8.242e+03],
       [1.000e+00, 0.000e+00, 0.000e+00, ..., 1.231e+03, 1.500e+03,
        8.242e+03]])
In [94]:
y_pred= reg.predict(x_test)
print (y_pred)
[3.6554235  3.45375423 3.46836078 ... 3.53803275 4.72090704 3.45206852]
In [95]:
display (r2_score(y_test,y_pred))
0.30080021393797163
In [96]:
#Preparing Extra Tree Regression
from sklearn.ensemble import  ExtraTreesRegressor
ET_Model=ExtraTreesRegressor(n_estimators = 120)
ET_Model.fit(x_train,y_train)
Out[96]:
ExtraTreesRegressor(n_estimators=120)
In [97]:
y_predict=ET_Model.predict(x_test)
In [98]:
from sklearn.metrics import r2_score
display(r2_score(y_test,y_predict))
0.9323350619857341
In [99]:
#Use pickle to save our model so that we can use it later

import pickle 
# Saving model to disk
pickle.dump(ET_Model, open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))
In [100]:
model
Out[100]:
ExtraTreesRegressor(n_estimators=120)
In [101]:
model.predict(x)
Out[101]:
array([4.1, 4.1, 3.8, ..., 3.6, 4.3, 3.4])
In [ ]: